/**********************************************************************
gc_6_descriptive_analysis.do

**********************************************************************/
**********
* SET UP *
**********
clear all
set matsize 2000
set more 1

* location for dofiles *
cd "T:\_Projet_4915\dofiles"
global dir "`c(pwd)'"
cd $dir

*********
* GATES *
*********
* Specify which data you want to work with (synthetic = syn, real = rl) *
local ext = "rl"

* Specify which gender to process (men = men, women = women) *
local gender = "men"

* Specify first observed age (24 25 26) *
global start_age "25"

* Specify occupational definition to use (occ_version 1 2)
global occ_version "2"

* Specify firm class definition (1 2 3 4 5 6)
global class_def "6"

* STEP 1: prepare dataset for analysis *
local gate1 = 1

* STEP 3: summary statistics for all occupations *
local gate3 = 1

* STEP 4: more facts 1: career transitions *
local gate4 = 1

* STEP 5: more facts 2: lifecycle *
local gate5 = 1

* STEP 6: more facts 3: first spell *
local gate6 = 1

* STEP 6: more facts 3: first spell, selected *
local gate6b = 1

* STEP 8: plot the distribution of earnings by occupation, all prime age *
local gate8 = 1

* STEP 9: plot the share of entrepreneurs over time, cohort by cohort *
local gate9 = 1

* STEP X: delete all intermediate datasets *
local gateX = 1

* start log file *
quietly capture log close
quietly log using gc_6_descriptive_analysis_2_`gender'_`ext', text replace

* specify file locations *
global project_folder "\_Projet_4915"
global data_folder "\_Projet_4915\DATA"
global output_folder "\_Projet_4915\ResultsFolder"
global temp "temp"

local datadir T:\${data_folder}\

*******************************************
* MAKE A GLOBAL VARIABLE FOR TODAY'S DATE *
*******************************************
local tyr = substr("$S_DATE",8,4)
local tmo = substr("$S_DATE",4,3)
local tmd = trim(substr("$S_DATE",1,2))

* make day of month two digits *
local wl = length("`tmd'")
if `wl'==1 {
local tmd2 ="0"+"`tmd'"
}
if `wl'!=1 {
local tmd2 ="`tmd'"
}

* get numeric month, make it two digits *
local di="`tmd2'"+"`tmo'"+"`tyr'"
local edate = date("`di'", "DMY")
local mono = month(`edate')
local ml = length("`mono'")
if `ml'==1 {
local mono2 ="0"+"`mono'"
}
if `ml'!=1 {
local mono2 ="`mono'"
}

* put final date together *
global date = "`tyr'"+"`mono2'"+"`tmd2'"

****************************************
* DEFINE LITTLE PROGRAMS TO PRINT TIME *
****************************************
program define starttime
	display "Started processing at $S_TIME on $S_DATE"
end

program define endtime
	display "Finished processing at $S_TIME on $S_DATE"
end

************************
* START OF THE PROGRAM *
************************

****************************************
* STEP 1: prepare dataset for analysis *
****************************************
disp "***** Started processing STEP 1 *****"
starttime
if 1 == `gate1' ///
{
	disp "***** STEP 1: prepare dataset for analysis *****"
	foreach a of global start_age ///
	{
		foreach v of global occ_version ///
		{	
			foreach d of global class_def ///
			{
				
				local datadir T:\${data_folder}\
	
				use "`datadir'gc_analyze_me_`gender'_start`a'_occ_v`v'_`ext'.dta", clear

				*****************************************************************
				* construct a two digit categorical variable to denote action *
				*****************************************************************
				* first digit denotes occupation *
				* 1 = paid employee, 2 = self-employed, 3 = entrepreneur, 9 = unemployed *
				tostring(occ_v`v'), gen(first_digit)
				replace first_digit = "9" if occ_v`v' == 0
	
				* second digit denotes firm class *
				gen firm_class = 0
				
				if `d' == 1 ///
				{
					replace firm_class = 1 if f1_employment_4c == 1
					replace firm_class = 2 if f1_employment_4c == 2
					replace firm_class = 3 if f1_employment_4c == 3 | f1_employment_4c == 4
				}
				if `d' == 2 ///
				{
					replace firm_class = 1 if f2_employment_4c == 1
					replace firm_class = 2 if f2_employment_4c == 2
					replace firm_class = 3 if f2_employment_4c == 3 | f2_employment_4c == 4
				}
				if `d' == 3 ///
				{
					replace firm_class = 1 if f1_rev_perL_4c == 1
					replace firm_class = 2 if f1_rev_perL_4c == 2
					replace firm_class = 3 if f1_rev_perL_4c == 3 | f1_rev_perL_4c == 4
				}
				if `d' == 4 ///
				{
					replace firm_class = 1 if f2_rev_perL_4c == 1
					replace firm_class = 2 if f2_rev_perL_4c == 2
					replace firm_class = 3 if f2_rev_perL_4c == 3 | f2_rev_perL_4c == 4
				}
				if `d' == 5 ///
				{
					replace firm_class = 1 if f1_payroll_perL_4c == 1
					replace firm_class = 2 if f1_payroll_perL_4c == 2
					replace firm_class = 3 if f1_payroll_perL_4c == 3 | f1_payroll_perL_4c == 4
				}
				if `d' == 6 ///
				{
					replace firm_class = 1 if f2_payroll_perL_4c == 1
					replace firm_class = 2 if f2_payroll_perL_4c == 2
					replace firm_class = 3 if f2_payroll_perL_4c == 3 | f2_payroll_perL_4c == 4
				}
				
				tostring(firm_class), gen(second_digit)
				replace second_digit = "0" if occ_v`v' == 0
				replace second_digit = "0" if occ_v`v' == 2
				replace second_digit = "0" if occ_v`v' == 3
				gen action = first_digit + second_digit
				destring(action), replace
				drop second_digit
				
				*****************************************************************************
				* create categorical variable to denote action over the past 11 periods *
				*****************************************************************************
				capture drop period
				sort pid tax_yr
				by pid: gen period = _n
				tab period
				forvalues i = 1(1)11 ///
				{
					gen L`i'action = .
					by pid: replace L`i'action = action[_n - `i'] if period > `i'
				}
	
				***********************************
				* construct vectors of experience *
				***********************************
				* create a list of all possible actions *
				foreach act in ///
				"11" "12" "13" ///
				"20" "30" "90" ///
				{
					forvalues i = 1(1)11 ///
					{
						gen flag`i' = 0
						replace flag`i' = 1 if L`i'action == `act'
					}
					* construct vector of experience accumulated since first year *
					egen exper_`act' = rowtotal(flag*)
					drop flag*
				}

				* Set last action to L2action if last action is nonemployment and L2action != . *
				rename L1action last_action
				rename L2action lastlast_action
				*replace last_action = lastlast_action if last_action == 90 & lastlast_action != .
				drop L*
				rename last_action Laction
				rename lastlast_action L2action
	
				* Set initial condition for Laction to action in the initial period *
				*replace Laction = action if age == `a'
	
				* construct main outcome variable *
				gen ln_y = log(y_v`v')
	
				* construct numeric person ID *
				destring pid, gen(pid_int)
	
				* construct aggregate experience variables *
				gen exper_10 = exper_11 + exper_12 + exper_13	
				gen age_lvl = age-`a'
				gen age_sq = age_lvl*age_lvl
	
				* construct polynomials of experience *
				foreach var in ///
				"exper_10" ///
				"exper_11" "exper_12" "exper_13" ///
				"exper_20" "exper_30" "exper_90" ///
				{
					gen `var'_sq = `var'*`var'
				}
	
				************
				* CLEAN UP *
				************
				sort pid tax_yr

				* save intermediate dataset to the data_folder *
				save "`datadir'gc_analyze_me_`gender'_start`a'_occ_v`v'_def`d'_tmp.dta", replace
				local datadir T:\${data_folder}\gc_esamples\
				saveold "`datadir'gc_analyze_me_`gender'_start`a'_occ_v`v'_def`d'_tmp.dta", replace v(12)
				clear
			}
		}
	}
	disp "***** STEP 1: prepare dataset for analysis (COMPLETED) *****"
}
disp "***** Finished processing STEP 1 *****"
endtime

**************************************************
* STEP 3: summary statistics for all occupations *
**************************************************
disp "***** Started processing STEP 3 *****"
starttime
if 1 == `gate3' ///
{
disp "***** STEP 3: summary statistics for all occupations *****"
foreach a of global start_age ///
{
foreach v of global occ_version ///
{	
foreach d of global class_def ///
{

	local datadir T:\${data_folder}\

	use "`datadir'gc_analyze_me_`gender'_start`a'_occ_v`v'_def`d'_tmp.dta", clear
	
	* construct an initial condition variable *
	gen initial_tmp = .
	replace initial_tmp = Laction if age == `a'
	bys pid_int: egen action_entry = max(initial_tmp)
	drop initial_tmp
	
	rename res_pc_inc_entry inc_entry
	rename res_pc_shr_bus_entry shr_bus_entry
	rename res_pc_popden_entry popden_entry
	rename res_pc_busipopden_entry busipopden_entry
	
	foreach var in inc_entry shr_bus_entry popden_entry busipopden_entry ///
	{
		gen `var'_cat = 0
		capture sum `var' if age == `a', detail
		display "`var' 75th pctile is `r(p75)'"
		replace `var'_cat = 1 if `var' >= `r(p75)'
	}
	
	* population and business owners density per square km *
	replace popden_entry = popden_entry*1000000
	replace busipopden_entry = busipopden_entry*1000000
	gen big_city_entry = 0
	replace big_city_entry = 1 if res_city_size_entry == 1
	
	*** calculate the mean of certain variables by occupation ***
	foreach act in ///
	"11" "12" "13" ///
	"20" "30" "90" ///
	{
		preserve
		keep if action == `act'
		sort pid_int tax_yr
		by pid_int: gen counter_id = _n
		gen n_ind = .
		replace n_ind = 1 if counter_id == 1
		drop counter_id
		local datadir T:\${data_folder}\gc_esamples\
		saveold "`datadir'gc_ss_occ_`act'_start`a'_occ_v`v'_def`d'.dta", replace v(12)
		sum ln_y age exper_10 exper_11 exper_12 exper_13 exper_20 exper_30 inc_entry inc_entry_cat shr_bus_entry shr_bus_entry_cat big_city_entry
		local datadir T:\${data_folder}\
		collapse (mean) ln_y age exper_10 exper_11 exper_12 exper_13 exper_20 exper_30 inc_entry inc_entry_cat shr_bus_entry shr_bus_entry_cat big_city_entry  (count) ind=n_ind obs=pid_int
		gen action = "`act'"
		gen stat_type = "mean"
  		* save it or stack it *
  		if "`act'" == "11" ///
    	{
    		save "`datadir'gc_reduced_tmp2_`ext'.dta", replace
    	}
  		if "`act'" != "11" ///
    	{
    		append using "`datadir'gc_reduced_tmp2_`ext'.dta"
    		save "`datadir'gc_reduced_tmp2_`ext'.dta", replace
    	}
		restore
	}
	*** calculate the mean of certain variables for all occupations ***
	preserve
	sort pid_int tax_yr
	by pid_int: gen counter_id = _n
	gen n_ind = .
	replace n_ind = 1 if counter_id == 1
	drop counter_id
	local datadir T:\${data_folder}\gc_esamples\
	saveold "`datadir'gc_ss_occ_all_start`a'_occ_v`v'_def`d'.dta", replace v(12)
	sum ln_y age exper_10 exper_11 exper_12 exper_13 exper_20 exper_30 inc_entry inc_entry_cat shr_bus_entry shr_bus_entry_cat big_city_entry
	local datadir T:\${data_folder}\
	collapse (mean) ln_y age exper_10 exper_11 exper_12 exper_13 exper_20 exper_30 inc_entry inc_entry_cat shr_bus_entry shr_bus_entry_cat big_city_entry (count) ind=n_ind obs=pid_int
	gen action = "0"
	gen stat_type = "mean"
    append using "`datadir'gc_reduced_tmp2_`ext'.dta"
    save "`datadir'gc_reduced_tmp2_`ext'.dta", replace
	restore
	
	*** calculate the sd of certain variables by occupation ***
	foreach act in ///
	"11" "12" "13" ///
	"20" "30" "90" ///
	{
		preserve
		keep if action == `act'
		collapse (sd) ln_y age exper_10 exper_11 exper_12 exper_13 exper_20 exper_30 inc_entry inc_entry_cat shr_bus_entry shr_bus_entry_cat big_city_entry
		gen action = "`act'"
		gen stat_type = "sd"
  		* save it or stack it *
    	append using "`datadir'gc_reduced_tmp2_`ext'.dta"
    	save "`datadir'gc_reduced_tmp2_`ext'.dta", replace
		restore
	}
	*** calculate the sd of certain variables for all occupations ***
	preserve
	collapse (sd) ln_y age exper_10 exper_11 exper_12 exper_13 exper_20 exper_30 inc_entry inc_entry_cat shr_bus_entry shr_bus_entry_cat big_city_entry
	gen action = "0"
	gen stat_type = "sd"
    append using "`datadir'gc_reduced_tmp2_`ext'.dta"
    save "`datadir'gc_reduced_tmp2_`ext'.dta", replace
	restore
	
	*** bring means and sds and reformat ***
	use "`datadir'gc_reduced_tmp2_`ext'.dta", clear
	keep if stat_type == "mean"
	drop stat_type
	order action
	sort action
	save "`datadir'gc_reduced_tmp3_`ext'.dta", replace
	
	use "`datadir'gc_reduced_tmp2_`ext'.dta", clear
	keep if stat_type == "sd"
	drop stat_type
	order action
	sort action
	rename ln_y ln_y_sd
	rename age age_sd
	rename exper_10 exper_10_sd
	rename exper_11 exper_11_sd
	rename exper_12 exper_12_sd
	rename exper_13 exper_13_sd
	rename exper_20 exper_20_sd
	rename exper_30 exper_30_sd
	rename inc_entry inc_entry_sd
	rename inc_entry_cat inc_entry_cat_sd
	rename shr_bus_entry shr_bus_entry_sd
	rename shr_bus_entry_cat shr_bus_entry_cat_sd
	rename big_city_entry  big_city_entry_sd
	merge 1:1 action using "`datadir'gc_reduced_tmp3_`ext'.dta", update
	drop _merge
	gen sort1 = 0
	replace sort1 = 1 if action == "30"
	replace sort1 = 2 if action == "11"
	replace sort1 = 3 if action == "12"
	replace sort1 = 4 if action == "13"
	replace sort1 = 6 if action == "20"
	replace sort1 = 7 if action == "90"
	sort sort1
	drop sort1
	order action obs ind ln_y ln_y_sd age age_sd ///
	exper_10 exper_10_sd ///
	exper_11 exper_11_sd ///
	exper_12 exper_12_sd ///
	exper_13 exper_13_sd ///
	exper_20 exper_20_sd ///
	exper_30 exper_30_sd ///
	inc_entry inc_entry_sd ///
	inc_entry_cat inc_entry_cat_sd ///
	shr_bus_entry shr_bus_entry_sd ///
	shr_bus_entry_cat shr_bus_entry_cat_sd ///
	big_city_entry  big_city_entry_sd
	save "`datadir'gc_reduced_tmp4_`ext'.dta", replace
	
	* set up save table options *
	local datadir T:\${output_folder}\
	disp "`datadir'"
	export excel using "`datadir'gc_${date}_ss_all_occupations_`gender'_start`a'_occ_v`v'_def`d'.xlsx", firstrow(variables) replace
	clear
	
	* delete intermediate datasets *
	local datadir T:\${data_folder}\
	capture erase "`datadir'gc_reduced_tmp2_`ext'.dta"
	capture erase "`datadir'gc_reduced_tmp3_`ext'.dta"
	capture erase "`datadir'gc_reduced_tmp4_`ext'.dta"

}
}
}
disp "***** STEP 3: summary statistics for all occupations (COMPLETED) *****"
}
disp "***** Finished processing STEP 3 *****"
endtime

********************************************
* STEP 4: more facts 1: career transitions *
********************************************
disp "***** Started processing STEP 4 *****"
starttime
if 1 == `gate4' ///
{
disp "***** STEP 4: more facts 1: career transitions *****"
foreach a of global start_age ///
{
foreach v of global occ_version ///
{	
foreach d of global class_def ///
{

	local datadir T:\${data_folder}\

	*********************************
	* Row transition matrix, age 30 *
	*********************************
	use "`datadir'gc_analyze_me_`gender'_start`a'_occ_v`v'_def`d'_tmp.dta", clear
	
	keep if age == 30

	tab action, gen(action_shr)
	
	*** calculate the share of action by last action ***
	foreach Lact in ///
	"11" "12" "13" ///
	"20" "30" "90" ///
	{
		preserve
		keep if Laction == `Lact'
		sort pid_int tax_yr
		by pid_int: gen counter_id = _n
		gen n_ind = .
		replace n_ind = 1 if counter_id == 1
		drop counter_id
		local datadir T:\${data_folder}\gc_esamples\
		saveold "`datadir'gc_ss_row30_L`Lact'_start`a'_occ_v`v'_def`d'.dta", replace v(12)
		sum action_shr1 action_shr2 action_shr3 action_shr4 action_shr5 action_shr6
		local datadir T:\${data_folder}\
		collapse (mean) action_shr1 action_shr2 action_shr3 action_shr4 action_shr5 action_shr6 (count) ind=n_ind obs=pid_int
		gen Laction = "`Lact'"
		gen stat_type = "mean"
  		* save it or stack it *
  		if "`Lact'" == "11" ///
    	{
    		save "`datadir'gc_reduced_tmp2_`ext'.dta", replace
    	}
  		if "`Lact'" != "11" ///
    	{
    		append using "`datadir'gc_reduced_tmp2_`ext'.dta"
    		save "`datadir'gc_reduced_tmp2_`ext'.dta", replace
    	}
		restore
	}
	*** calculate the mean of certain variables for all occupations ***
	preserve
	sort pid_int tax_yr
	by pid_int: gen counter_id = _n
	gen n_ind = .
	replace n_ind = 1 if counter_id == 1
	drop counter_id
	local datadir T:\${data_folder}\gc_esamples\
	saveold "`datadir'gc_ss_row30_Lall_start`a'_occ_v`v'_def`d'.dta", replace v(12)
	sum action_shr1 action_shr2 action_shr3 action_shr4 action_shr5 action_shr6
	local datadir T:\${data_folder}\
	collapse (mean) action_shr1 action_shr2 action_shr3 action_shr4 action_shr5 action_shr6 (count) ind=n_ind obs=pid_int
	gen Laction = "0"
	gen stat_type = "mean"
    append using "`datadir'gc_reduced_tmp2_`ext'.dta"
    save "`datadir'gc_reduced_tmp2_`ext'.dta", replace
	restore
	
	*** calculate the sd of certain variables by occupation ***
	foreach Lact in ///
	"11" "12" "13" ///
	"20" "30" "90" ///
	{
		preserve
		keep if Laction == `Lact'
		collapse (sd) action_shr1 action_shr2 action_shr3 action_shr4 action_shr5 action_shr6
		gen Laction = "`Lact'"
		gen stat_type = "sd"
  		* save it or stack it *
    	append using "`datadir'gc_reduced_tmp2_`ext'.dta"
    	save "`datadir'gc_reduced_tmp2_`ext'.dta", replace
		restore
	}
	*** calculate the sd of certain variables for all occupations ***
	preserve
	collapse (sd) action_shr1 action_shr2 action_shr3 action_shr4 action_shr5 action_shr6
	gen Laction = "0"
	gen stat_type = "sd"
    append using "`datadir'gc_reduced_tmp2_`ext'.dta"
    save "`datadir'gc_reduced_tmp2_`ext'.dta", replace
	restore
	
	*** bring means and sds and reformat ***
	use "`datadir'gc_reduced_tmp2_`ext'.dta", clear
	keep if stat_type == "mean"
	drop stat_type
	order Laction
	sort Laction
	save "`datadir'gc_reduced_tmp3_`ext'.dta", replace
	
	use "`datadir'gc_reduced_tmp2_`ext'.dta", clear
	keep if stat_type == "sd"
	drop stat_type
	order Laction
	sort Laction
	forvalues j = 1(1)6 ///
	{
		rename action_shr`j' action_shr`j'_sd
	}
	merge 1:1 Laction using "`datadir'gc_reduced_tmp3_`ext'.dta", update
	drop _merge
	gen sort1 = 0
	replace sort1 = 1 if Laction == "11"
	replace sort1 = 2 if Laction == "12"
	replace sort1 = 3 if Laction == "13"
	replace sort1 = 5 if Laction == "20"
	replace sort1 = 6 if Laction == "30"
	replace sort1 = 7 if Laction == "90"
	sort sort1
	drop sort1
	order Laction obs ind ///
	action_shr1 action_shr1_sd ///
	action_shr2 action_shr2_sd ///
	action_shr3 action_shr3_sd ///
	action_shr4 action_shr4_sd ///
	action_shr5 action_shr5_sd ///
	action_shr6 action_shr6_sd
	save "`datadir'gc_reduced_tmp4_`ext'.dta", replace
	
	* set up save table options *
	local datadir T:\${output_folder}\
	disp "`datadir'"
	export excel using "`datadir'gc_${date}_ss_rowtransit30_`gender'_start`a'_occ_v`v'_def`d'.xlsx", firstrow(variables) replace
	clear

	* delete intermediate datasets *
	local datadir T:\${data_folder}\
	capture erase "`datadir'gc_reduced_tmp2_`ext'.dta"
	capture erase "`datadir'gc_reduced_tmp3_`ext'.dta"
	capture erase "`datadir'gc_reduced_tmp4_`ext'.dta"
	
	************************************
	* Column transition matrix, age 30 *
	************************************
	use "`datadir'gc_analyze_me_`gender'_start`a'_occ_v`v'_def`d'_tmp.dta", clear
	
	keep if age == 30
	
	tab Laction, gen(Laction_shr)
	
	*** calculate the share of action by action ***
	foreach act in ///
	"11" "12" "13" ///
	"20" "30" "90" ///
	{
		preserve
		keep if action == `act'
		sort pid_int tax_yr
		by pid_int: gen counter_id = _n
		gen n_ind = .
		replace n_ind = 1 if counter_id == 1
		drop counter_id
		local datadir T:\${data_folder}\gc_esamples\
		saveold "`datadir'gc_ss_col30_A`act'_start`a'_occ_v`v'_def`d'.dta", replace v(12)
		sum Laction_shr1 Laction_shr2 Laction_shr3 Laction_shr4 Laction_shr5 Laction_shr6
		local datadir T:\${data_folder}\
		collapse (mean) Laction_shr1 Laction_shr2 Laction_shr3 Laction_shr4 Laction_shr5 Laction_shr6 (count) ind=n_ind obs=pid_int
		gen action = "`act'"
		gen stat_type = "mean"
  		* save it or stack it *
  		if "`act'" == "11" ///
    	{
    		save "`datadir'gc_reduced_tmp2_`ext'.dta", replace
    	}
  		if "`act'" != "11" ///
    	{
    		append using "`datadir'gc_reduced_tmp2_`ext'.dta"
    		save "`datadir'gc_reduced_tmp2_`ext'.dta", replace
    	}
		restore
	}
	*** calculate the mean of certain variables for all occupations ***
	preserve
	sort pid_int tax_yr
	by pid_int: gen counter_id = _n
	gen n_ind = .
	replace n_ind = 1 if counter_id == 1
	drop counter_id
	local datadir T:\${data_folder}\gc_esamples\
	saveold "`datadir'gc_ss_col30_Aall_start`a'_occ_v`v'_def`d'.dta", replace v(12)
	sum Laction_shr1 Laction_shr2 Laction_shr3 Laction_shr4 Laction_shr5 Laction_shr6
	local datadir T:\${data_folder}\
	collapse (mean) Laction_shr1 Laction_shr2 Laction_shr3 Laction_shr4 Laction_shr5 Laction_shr6 (count) ind=n_ind obs=pid_int
	gen action = "0"
	gen stat_type = "mean"
    append using "`datadir'gc_reduced_tmp2_`ext'.dta"
    save "`datadir'gc_reduced_tmp2_`ext'.dta", replace
	restore
	
	*** calculate the sd of certain variables by occupation ***
	foreach act in ///
	"11" "12" "13" ///
	"20" "30" "90" ///
	{
		preserve
		keep if action == `act'
		collapse (sd) Laction_shr1 Laction_shr2 Laction_shr3 Laction_shr4 Laction_shr5 Laction_shr6
		gen action = "`act'"
		gen stat_type = "sd"
  		* save it or stack it *
    	append using "`datadir'gc_reduced_tmp2_`ext'.dta"
    	save "`datadir'gc_reduced_tmp2_`ext'.dta", replace
		restore
	}
	*** calculate the sd of certain variables for all occupations ***
	preserve
	collapse (sd) Laction_shr1 Laction_shr2 Laction_shr3 Laction_shr4 Laction_shr5 Laction_shr6
	gen action = "0"
	gen stat_type = "sd"
    append using "`datadir'gc_reduced_tmp2_`ext'.dta"
    save "`datadir'gc_reduced_tmp2_`ext'.dta", replace
	restore
	
	*** bring means and sds and reformat ***
	use "`datadir'gc_reduced_tmp2_`ext'.dta", clear
	keep if stat_type == "mean"
	drop stat_type
	order action
	sort action
	save "`datadir'gc_reduced_tmp3_`ext'.dta", replace
	
	use "`datadir'gc_reduced_tmp2_`ext'.dta", clear
	keep if stat_type == "sd"
	drop stat_type
	order action
	sort action
	forvalues j = 1(1)6 ///
	{
		rename Laction_shr`j' Laction_shr`j'_sd
	}
	merge 1:1 action using "`datadir'gc_reduced_tmp3_`ext'.dta", update
	drop _merge
	gen sort1 = 0
	replace sort1 = 1 if action == "11"
	replace sort1 = 2 if action == "12"
	replace sort1 = 3 if action == "13"
	replace sort1 = 5 if action == "20"
	replace sort1 = 6 if action == "30"
	replace sort1 = 7 if action == "90"
	sort sort1
	drop sort1
	order action obs ind ///
	Laction_shr1 Laction_shr1_sd ///
	Laction_shr2 Laction_shr2_sd ///
	Laction_shr3 Laction_shr3_sd ///
	Laction_shr4 Laction_shr4_sd ///
	Laction_shr5 Laction_shr5_sd ///
	Laction_shr6 Laction_shr6_sd
	save "`datadir'gc_reduced_tmp4_`ext'.dta", replace
	
	* set up save table options *
	local datadir T:\${output_folder}\
	disp "`datadir'"
	export excel using "`datadir'gc_${date}_ss_coltransit30_`gender'_start`a'_occ_v`v'_def`d'.xlsx", firstrow(variables) replace
	clear

	* delete intermediate datasets *
	local datadir T:\${data_folder}\
	capture erase "`datadir'gc_reduced_tmp2_`ext'.dta"
	capture erase "`datadir'gc_reduced_tmp3_`ext'.dta"
	capture erase "`datadir'gc_reduced_tmp4_`ext'.dta"
	
}
}
}
disp "***** STEP 4: more facts 1: career transitions (COMPLETED) *****"
}
disp "***** Finished processing STEP 4 *****"
endtime

****************************************************
* STEP 5: more facts 2: choices over the lifecycle *
****************************************************
disp "***** Started processing STEP 5 *****"
starttime
if 1 == `gate5' ///
{
disp "***** STEP 5: more facts 2: lifecycle *****"
foreach a of global start_age ///
{
foreach v of global occ_version ///
{	
foreach d of global class_def ///
{

	local datadir T:\${data_folder}\
	use "`datadir'gc_analyze_me_`gender'_start`a'_occ_v`v'_def`d'_tmp.dta", clear
	
	tab action, gen(action_shr)
	*** calculate the share of action by age ***
	forvalues ag = 0(1)11 ///
	{
		preserve
		keep if age-`a' == `ag'
		sort pid_int tax_yr
		by pid_int: gen counter_id = _n
		gen n_ind = .
		replace n_ind = 1 if counter_id == 1
		drop counter_id
		local datadir T:\${data_folder}\gc_esamples\
		saveold "`datadir'gc_ss_lifecycle_age`ag'_start`a'_occ_v`v'_def`d'.dta", replace v(12)
		sum action_shr1 action_shr2 action_shr3 action_shr4 action_shr5 action_shr6
		local datadir T:\${data_folder}\
		collapse (mean) action_shr1 action_shr2 action_shr3 action_shr4 action_shr5 action_shr6 (count) ind=n_ind obs=pid_int
		gen age = `ag'+`a'
		tostring(age), replace
		gen stat_type = "mean"
  		* save it or stack it *
  		if "`ag'" == "0" ///
    	{
    		save "`datadir'gc_reduced_tmp2_`ext'.dta", replace
    	}
  		if "`ag'" != "0" ///
    	{
    		append using "`datadir'gc_reduced_tmp2_`ext'.dta"
    		save "`datadir'gc_reduced_tmp2_`ext'.dta", replace
    	}
		restore
	}
	*** calculate the mean of certain variables for all occupations ***
	preserve
	sort pid_int tax_yr
	by pid_int: gen counter_id = _n
	gen n_ind = .
	replace n_ind = 1 if counter_id == 1
	drop counter_id
	local datadir T:\${data_folder}\gc_esamples\
	saveold "`datadir'gc_ss_lifecycle_ageall_start`a'_occ_v`v'_def`d'.dta", replace v(12)
	sum action_shr1 action_shr2 action_shr3 action_shr4 action_shr5 action_shr6
	local datadir T:\${data_folder}\
	collapse (mean) action_shr1 action_shr2 action_shr3 action_shr4 action_shr5 action_shr6 (count) ind=n_ind obs=pid_int
	gen age = "0"
	gen stat_type = "mean"
    append using "`datadir'gc_reduced_tmp2_`ext'.dta"
    save "`datadir'gc_reduced_tmp2_`ext'.dta", replace
	restore
	
	*** calculate the sd of certain variables by occupation ***
	forvalues ag = 0(1)11 ///
	{
		preserve
		keep if age-`a' == `ag'
		collapse (sd) action_shr1 action_shr2 action_shr3 action_shr4 action_shr5 action_shr6
		gen age = `ag'+`a'
		tostring(age), replace
		gen stat_type = "sd"
  		* save it or stack it *
    	append using "`datadir'gc_reduced_tmp2_`ext'.dta"
    	save "`datadir'gc_reduced_tmp2_`ext'.dta", replace
		restore
	}
	*** calculate the sd of certain variables for all occupations ***
	preserve
	collapse (sd) action_shr1 action_shr2 action_shr3 action_shr4 action_shr5 action_shr6
	gen age = "0"
	gen stat_type = "sd"
    append using "`datadir'gc_reduced_tmp2_`ext'.dta"
    save "`datadir'gc_reduced_tmp2_`ext'.dta", replace
	restore
	
	*** bring means and sds and reformat ***
	use "`datadir'gc_reduced_tmp2_`ext'.dta", clear
	keep if stat_type == "mean"
	drop stat_type
	order age
	sort age
	save "`datadir'gc_reduced_tmp3_`ext'.dta", replace
	
	use "`datadir'gc_reduced_tmp2_`ext'.dta", clear
	keep if stat_type == "sd"
	drop stat_type
	order age
	sort age
	forvalues j = 1(1)6 ///
	{
		rename action_shr`j' action_shr`j'_sd
	}
	merge 1:1 age using "`datadir'gc_reduced_tmp3_`ext'.dta", update
	drop _merge
	sort age
	order age obs ind ///
	action_shr1 action_shr1_sd ///
	action_shr2 action_shr2_sd ///
	action_shr3 action_shr3_sd ///
	action_shr4 action_shr4_sd ///
	action_shr5 action_shr5_sd ///
	action_shr6 action_shr6_sd
	save "`datadir'gc_reduced_tmp4_`ext'.dta", replace
	
	* set up save table options *
	local datadir T:\${output_folder}\
	disp "`datadir'"
	export excel using "`datadir'gc_${date}_ss_lifecycle_`gender'_start`a'_occ_v`v'_def`d'.xlsx", firstrow(variables) replace
	clear
	
	* delete intermediate datasets *
	local datadir T:\${data_folder}\
	capture erase "`datadir'gc_reduced_tmp2_`ext'.dta"
	capture erase "`datadir'gc_reduced_tmp3_`ext'.dta"
	capture erase "`datadir'gc_reduced_tmp4_`ext'.dta"

	
	local datadir T:\${data_folder}\
	use "`datadir'gc_analyze_me_`gender'_start`a'_occ_v`v'_def`d'_tmp.dta", clear
	sort pid_int tax_yr	
	drop if action == 90
	drop if age == 25
	drop if age == 36
	
	local datadir T:\${data_folder}\gc_esamples\
	saveold "`datadir'gc_ss_lifecycle_lny_start`a'_occ_v`v'_def`d'.dta", replace v(12)
	local datadir T:\${data_folder}\
	
	preserve
	sort pid_int tax_yr
	collapse (mean) ln_y (count) obs=pid_int, by(action age)
	tostring(age), replace
	tostring(action), replace
	gen stat_type = "mean"
  	* save it or stack it *
	save "`datadir'gc_reduced_tmp2_`ext'.dta", replace
	restore

	preserve
	sort pid_int tax_yr
	drop if action == 20
	drop if action == 30
	local datadir T:\${data_folder}\gc_esamples\
	saveold "`datadir'gc_ss_lifecycle_lny_act10_start`a'_occ_v`v'_def`d'.dta", replace v(12)
	local datadir T:\${data_folder}\
	collapse (mean) ln_y (count) obs=pid_int, by(age)
	gen action = "10"
	tostring(age), replace
	gen stat_type = "mean"
    append using "`datadir'gc_reduced_tmp2_`ext'.dta"
    save "`datadir'gc_reduced_tmp2_`ext'.dta", replace
	restore
	
	*** calculate the mean of certain variables for all occupations ***
	preserve
	sort pid_int tax_yr
	collapse (mean) ln_y (count) obs=pid_int, by(age)
	gen action = "0"
	tostring(age), replace
	gen stat_type = "mean"
    append using "`datadir'gc_reduced_tmp2_`ext'.dta"
    save "`datadir'gc_reduced_tmp2_`ext'.dta", replace
	restore
	
	*** calculate the sd of certain variables by occupation ***
	preserve
	collapse (sd) ln_y, by(action age)
	tostring(age), replace
	tostring(action), replace
	gen stat_type = "sd"
  	* save it or stack it *
    append using "`datadir'gc_reduced_tmp2_`ext'.dta"
    save "`datadir'gc_reduced_tmp2_`ext'.dta", replace
	restore

	*** calculate the sd of certain variables for all occupations ***
	preserve
	drop if action == 20
	drop if action == 30
	collapse (sd) ln_y, by(age)
	gen action = "10"
	tostring(age), replace
	gen stat_type = "sd"
    append using "`datadir'gc_reduced_tmp2_`ext'.dta"
    save "`datadir'gc_reduced_tmp2_`ext'.dta", replace
	restore
	
	*** calculate the sd of certain variables for all occupations ***
	preserve
	collapse (sd) ln_y, by(age)
	gen action = "0"
	tostring(age), replace
	gen stat_type = "sd"
    append using "`datadir'gc_reduced_tmp2_`ext'.dta"
    save "`datadir'gc_reduced_tmp2_`ext'.dta", replace
	restore
	
	*** bring means and sds and reformat ***
	use "`datadir'gc_reduced_tmp2_`ext'.dta", clear
	keep if stat_type == "mean"
	drop stat_type
	order action age
	sort action age
	save "`datadir'gc_reduced_tmp3_`ext'.dta", replace
	
	use "`datadir'gc_reduced_tmp2_`ext'.dta", clear
	keep if stat_type == "sd"
	drop stat_type
	order action age
	sort action age
	rename ln_y ln_y_sd
	merge 1:1 age action using "`datadir'gc_reduced_tmp3_`ext'.dta", update
	drop _merge
	sort action age
	order age action obs ///
	ln_y ln_y_sd
	save "`datadir'gc_reduced_tmp4_`ext'.dta", replace
	
	* set up save table options *
	local datadir T:\${output_folder}\
	disp "`datadir'"
	export excel using "`datadir'gc_${date}_ss_lifecycle_lny_`gender'_start`a'_occ_v`v'_def`d'.xlsx", firstrow(variables) replace
	clear
	
	* delete intermediate datasets *
	local datadir T:\${data_folder}\
	capture erase "`datadir'gc_reduced_tmp2_`ext'.dta"
	capture erase "`datadir'gc_reduced_tmp3_`ext'.dta"
	capture erase "`datadir'gc_reduced_tmp4_`ext'.dta"

}
}
}
disp "***** STEP 5: more facts 2: lifecycle (COMPLETED) *****"
}
disp "***** Finished processing STEP 5 *****"
endtime

*******************************************
* STEP 6: more facts 3: first spell stuff *
*******************************************
disp "***** Started processing STEP 6 *****"
starttime
if 1 == `gate6' ///
{
disp "***** STEP 6: more facts 3: first spell stuff *****"
foreach a of global start_age ///
{
foreach v of global occ_version ///
{	
foreach d of global class_def ///
{

	local datadir T:\${data_folder}\
	
	use "`datadir'gc_analyze_me_`gender'_start`a'_occ_v`v'_def`d'_tmp.dta", clear
	
	display "......................................................................................."
	display "... FIRST SPELL STUFF FOR `gender', START AGE: `a', OCC V: `v', FIRM CLASS: DEF `d' ..."
	display "......................................................................................."

	forvalues i = 1(1)2 ///
	{
		gen L`i'naics2 = .
		by pid: replace L`i'naics2 = naics2_int[_n - `i'] if period > `i'
		gen L`i'naics3 = .
		by pid: replace L`i'naics3 = naics3_int[_n - `i'] if period > `i'
		gen L`i'naics4 = .
		by pid: replace L`i'naics4 = naics4_int[_n - `i'] if period > `i'
	}
	
	forvalues i = 1(1)2 ///
	{
		gen L`i'res_cma_code = ""
		by pid: replace L`i'res_cma_code = res_cma_code[_n - `i'] if period > `i'
		gen L`i'res_prov = ""
		by pid: replace L`i'res_prov = res_prov[_n - `i'] if period > `i'
	}
	
	* identify first and last year as entrepreneur in the data *
	gen tmp = tax_yr if action == 30
	by pid: egen first_yr_ent = min(tmp)
	by pid: egen last_yr_ent = max(tmp)
	drop tmp
	
	* migration flows *
	preserve
	disp "MIGRATION FLOWS, ALL OBS"
	keep if L1res_prov != ""
	keep if L1res_cma_code != ""
	keep if res_prov != ""
	keep if res_cma_code != ""
	count
	count if res_cma_code != L1res_cma_code
	count if res_prov != L1res_prov
	restore
	
	* keep only individuals who have an entrepreneurial spell in the data *
	keep if first_yr_ent != .
	
	* drop observations before first year as entrepreneur or after last year as entrepreneur *
	drop if tax_yr < first_yr_ent
	drop if tax_yr > last_yr_ent
	
	* total number of startups *
	disp "NUMBER OF STARTUPS"
	sort pid_int tax_yr
	gen eid_startup_non_zero = 0
	replace eid_startup_non_zero = 1 if eid_startup !=""
	bys pid_int eid_startup eid_startup_non_zero: gen counter = _n
	replace counter = 0 if counter > 1
	replace counter = 0 if eid_startup_non_zero == 0
	bys pid_int: egen N_startup = sum(counter)
	drop counter eid_startup_non_zero
	tab N_startup if tax_yr==first_yr_ent
	drop N_startup
	sort pid tax_yr
	
	* keep track of where entrepreneur is originally coming from. *
	gen tmp_laction = Laction if tax_yr == first_yr_ent
	replace tmp_laction = L2action if tmp_laction == 90 & tax_yr == first_yr_ent
	by pid: egen origin = max(tmp_laction)
	drop tmp_laction
	
	* keep track of industry entrepreneur is originally coming from. *
	gen tmp_lnaics2 = L1naics2 if tax_yr == first_yr_ent
	replace tmp_lnaics2 = L2naics2 if tmp_lnaics2 == . & tax_yr == first_yr_ent
	by pid: egen origin_naics2 = max(tmp_lnaics2)
	drop tmp_lnaics2
	
	gen tmp_lnaics3 = L1naics3 if tax_yr == first_yr_ent
	replace tmp_lnaics3 = L2naics3 if tmp_lnaics3 == . & tax_yr == first_yr_ent
	by pid: egen origin_naics3 = max(tmp_lnaics3)
	drop tmp_lnaics3
	
	gen tmp_lnaics4 = L1naics4 if tax_yr == first_yr_ent
	replace tmp_lnaics4 = L2naics4 if tmp_lnaics4 == . & tax_yr == first_yr_ent
	by pid: egen origin_naics4 = max(tmp_lnaics4)
	drop tmp_lnaics4
	
	* keep track of location entrepreneur is originally from. *
	egen L1res_cma_int = group(L1res_cma_code)
	egen L2res_cma_int = group(L2res_cma_code)
	egen res_cma_int = group(res_cma_code)
	gen tmp_lres_cma = L1res_cma_int if tax_yr == first_yr_ent & L1res_prov != ""
	replace tmp_lres_cma = L2res_cma_int if tmp_lres_cma == . & tax_yr == first_yr_ent & L2res_prov != ""
	by pid: egen origin_res_cma = max(tmp_lres_cma)
	drop tmp_lres_cma
	
	* keep only first entrepreneurial spell *
	keep if action == 30
	local continue = 1
	while `continue' != 0 ///
	{
		sort pid tax_yr
		by pid: egen first_year = min(tax_yr)
		by pid: egen last_year = max(tax_yr)
		by pid: gen obs = _N
		gen potential_obs = last_year-first_year+1
		count if potential_obs > obs
		local continue = `r(N)'
		display `continue'
		if `continue' != 0 ///
		{
			drop if tax_yr == last_year & potential_obs > obs
		}
		drop first_year last_year obs potential_obs
	}
	
	disp "NUMBER OF STARTUPS IN FIRST ENTREPRENEURIAL SPELL ONLY"
	sort pid_int tax_yr
	gen eid_startup_non_zero = 0
	replace eid_startup_non_zero = 1 if eid_startup !=""
	bys pid_int eid_startup eid_startup_non_zero: gen counter = _n
	replace counter = 0 if counter > 1
	replace counter = 0 if eid_startup_non_zero == 0
	bys pid_int: egen N_startup = sum(counter)
	drop counter eid_startup_non_zero
	tab N_startup if tax_yr==first_yr_ent
	drop N_startup
	
	* identify first and last year of first entrepreneurial spell *
	sort pid tax_yr
	by pid: egen first_yr_spell1 = min(tax_yr)
	by pid: egen last_yr_spell1 = max(tax_yr)
	
	* display number of entrepreneurs in the data*
	count if tax_yr == first_yr_ent
	
	* identify individuals observed in the main panel X years after first entry into entrepreneurship, regardless of where they are in that year *
	forvalues i = 1(1)5 ///
	{
		gen alive_f`i' = 0
		replace alive_f`i' = 1 if first_yr_ent+`i'<=exit_yr
	}
	
	* identify individuals who survive at least X years in entrepreneurship after first entry *
	forvalues i = 1(1)5 ///
	{
		gen ent_f`i' = 0
		replace ent_f`i' = 1 if first_yr_ent+`i'<=last_yr_spell1
	}

	* construct potential experience groups *
	gen exp_grp =age-`a'
	replace exp_grp = 1 if exp_grp == 2
	replace exp_grp = 3 if exp_grp == 4
	replace exp_grp = 5 if exp_grp > 5
	
	display "report some summary statistics about first entrepreneurial spell"
	* first year as entrepreneur *
	preserve
	keep if tax_yr == first_yr_spell1
	
	* display number of entrepreneurs *
	count
	* display number of entrepreneurs with non-missing naics *
	disp "INDUSTRY FLOWS, ENTREPRENEURS"
	count if naics2_int !=. & origin_naics2 != .
	count if naics2_int == origin_naics2 & naics2_int !=. & origin_naics2 != .
	count if naics3_int !=. & origin_naics3 != .
	count if naics3_int == origin_naics3 & naics3_int !=. & origin_naics3 != .
	count if naics4_int !=. & origin_naics4 != .
	count if naics4_int == origin_naics4 & naics4_int !=. & origin_naics4 != .

	* display number of entrepreneurs with non-missing naics *
	disp "MIGRATION FLOWS, ENTREPRENEURS"
	count if (res_prov != "") & (L1res_prov != "")
	count if (res_cma_int == origin_res_cma) & (res_prov != "") & (L1res_prov != "")

	* summarize entrepreneurial earnings in first year *
	local datadir T:\${data_folder}\gc_esamples\
	saveold "`datadir'gc_ss_firstspell_t0_start`a'_occ_v`v'_def`d'.dta", replace v(12)
	local datadir T:\${data_folder}\
	sum ln_y
	bys exp_grp origin: sum ln_y
	restore
	forvalues i=1(1)5 ///
	{
		preserve
		* keep only individuals observed in the main panel X years after first entry into entrepreneurship, regardless of where they are in that year *
		keep if alive_f`i' == 1
		* calculate entrepreneurial income in that year *
		gen tmp = ln_y if tax_yr == first_yr_spell1+`i'
		sort pid tax_yr
		by pid: egen y_f`i' = max(tmp)
		drop tmp
		keep if tax_yr == first_yr_spell1
		gen gr_f`i' = (exp(y_f`i')-exp(ln_y))/exp(ln_y)
		* display number of individuals *
		count
		local datadir T:\${data_folder}\gc_esamples\
		saveold "`datadir'gc_ss_firstspell_t`i'_start`a'_occ_v`v'_def`d'.dta", replace v(12)
		local datadir T:\${data_folder}\
		* fraction surviving at least X years in entrepreneurship after first entry and average entrepreneurial earnings in that year*
		sum ent_f`i' y_f`i' gr_f`i'
		bys exp_grp origin: sum ent_f`i' y_f`i' gr_f`i'
		restore
	}
	
}
}
}
disp "***** STEP 6: more facts 3: first spell stuff (COMPLETED) *****"
}
disp "***** Finished processing STEP 6 *****"
endtime

*****************************************************
* STEP 6: more facts 3: first spell stuff, selected *
*****************************************************
disp "***** Started processing STEP 6 *****"
starttime
if 1 == `gate6b' ///
{
disp "***** STEP 6: more facts 3: first spell stuff *****"
foreach a of global start_age ///
{
foreach v of global occ_version ///
{	
foreach d of global class_def ///
{

	local datadir T:\${data_folder}\
	
	use "`datadir'gc_analyze_me_`gender'_start`a'_occ_v`v'_def`d'_tmp.dta", clear
	
	display "......................................................................................."
	display "... FIRST SPELL STUFF FOR `gender', START AGE: `a', OCC V: `v', FIRM CLASS: DEF `d' ..."
	display "......................................................................................."

	forvalues i = 1(1)2 ///
	{
		gen L`i'naics2 = .
		by pid: replace L`i'naics2 = naics2_int[_n - `i'] if period > `i'
		gen L`i'naics3 = .
		by pid: replace L`i'naics3 = naics3_int[_n - `i'] if period > `i'
		gen L`i'naics4 = .
		by pid: replace L`i'naics4 = naics4_int[_n - `i'] if period > `i'
	}
	
	forvalues i = 1(1)2 ///
	{
		gen L`i'res_cma_code = ""
		by pid: replace L`i'res_cma_code = res_cma_code[_n - `i'] if period > `i'
		gen L`i'res_prov = ""
		by pid: replace L`i'res_prov = res_prov[_n - `i'] if period > `i'
	}
	
	* identify first and last year as entrepreneur in the data *
	gen tmp = tax_yr if action == 30
	by pid: egen first_yr_ent = min(tmp)
	by pid: egen last_yr_ent = max(tmp)
	drop tmp
	
	* keep only individuals who have an entrepreneurial spell in the data *
	keep if first_yr_ent != .
	
	* drop observations before first year as entrepreneur or after last year as entrepreneur *
	drop if tax_yr < first_yr_ent
	drop if tax_yr > last_yr_ent
	
	sort pid tax_yr
	
	* keep track of where entrepreneur is originally coming from. *
	gen tmp_laction = Laction if tax_yr == first_yr_ent
	replace tmp_laction = L2action if tmp_laction == 90 & tax_yr == first_yr_ent
	by pid: egen origin = max(tmp_laction)
	drop tmp_laction
	
	* keep only first entrepreneurial spell *
	keep if action == 30
	local continue = 1
	while `continue' != 0 ///
	{
		sort pid tax_yr
		by pid: egen first_year = min(tax_yr)
		by pid: egen last_year = max(tax_yr)
		by pid: gen obs = _N
		gen potential_obs = last_year-first_year+1
		count if potential_obs > obs
		local continue = `r(N)'
		display `continue'
		if `continue' != 0 ///
		{
			drop if tax_yr == last_year & potential_obs > obs
		}
		drop first_year last_year obs potential_obs
	}
	
	* identify first and last year of first entrepreneurial spell *
	sort pid tax_yr
	by pid: egen first_yr_spell1 = min(tax_yr)
	by pid: egen last_yr_spell1 = max(tax_yr)
	
	* display number of entrepreneurs in the data*
	count if tax_yr == first_yr_ent
	
	* identify individuals observed in the main panel X years after first entry into entrepreneurship, regardless of where they are in that year *
	forvalues i = 1(1)5 ///
	{
		gen alive_f`i' = 0
		replace alive_f`i' = 1 if first_yr_ent+`i'<=exit_yr
	}
	
	* identify individuals who survive at least X years in entrepreneurship after first entry *
	forvalues i = 1(1)5 ///
	{
		gen ent_f`i' = 0
		replace ent_f`i' = 1 if first_yr_ent+`i'<=last_yr_spell1
	}

	* construct potential experience groups *
	gen exp_grp =age-`a'
	replace exp_grp = 1 if exp_grp == 2
	replace exp_grp = 3 if exp_grp == 4
	replace exp_grp = 5 if exp_grp > 5
	
	display "report some summary statistics about first entrepreneurial spell"
	
	* first year as entrepreneur *

	preserve
	keep if tax_yr == first_yr_spell1
	keep if ent_f5 == 1
	
	* display number of entrepreneurs *
	count

	* summarize entrepreneurial earnings in first year *
	local datadir T:\${data_folder}\gc_esamples\
	saveold "`datadir'gc_ss_fs_selec_t0_start`a'_occ_v`v'_def`d'.dta", replace v(12)
	local datadir T:\${data_folder}\
	sum ln_y
	bys exp_grp origin: sum ln_y
	restore
	keep if ent_f5 == 1
	forvalues i=1(1)5 ///
	{
		preserve
		* keep only individuals observed in the main panel X years after first entry into entrepreneurship, regardless of where they are in that year *
		keep if alive_f`i' == 1
		* calculate entrepreneurial income in that year *
		gen tmp = ln_y if tax_yr == first_yr_spell1+`i'
		sort pid tax_yr
		by pid: egen y_f`i' = max(tmp)
		drop tmp
		keep if tax_yr == first_yr_spell1
		gen gr_f`i' = (exp(y_f`i')-exp(ln_y))/exp(ln_y)
		* display number of individuals *
		count
		local datadir T:\${data_folder}\gc_esamples\
		saveold "`datadir'gc_ss_fs_selec_t`i'_start`a'_occ_v`v'_def`d'.dta", replace v(12)
		local datadir T:\${data_folder}\
		* fraction surviving at least X years in entrepreneurship after first entry and average entrepreneurial earnings in that year*
		sum ent_f`i' y_f`i' gr_f`i'
		bys exp_grp origin: sum ent_f`i' y_f`i' gr_f`i'
		restore
	}
	sort pid_int tax_yr
	by pid_int: gen t = _n
	keep if t <=6
	gen yt6 = ln_y if t == 6
	gen yt1 = ln_y if t == 1
	by pid_int: egen y6 = max(yt6)
	by pid_int: egen y1 = max(yt1)
	keep if t == 1
	gen growth = (exp(y6)-exp(y1))/exp(y1)
	local datadir T:\${data_folder}\gc_esamples\
	saveold "`datadir'gc_ss_fs_selec_g_start`a'_occ_v`v'_def`d'.dta", replace v(12)
	local datadir T:\${data_folder}\
	sum growth
	bys exp_grp origin: sum growth
}
}
}
disp "***** STEP 6: more facts 3: first spell stuff (COMPLETED) *****"
}
disp "***** Finished processing STEP 6 *****"
endtime

***********************************************************
* STEP 8: plot the distribution of earnings by occupation *
***********************************************************
disp "***** Started processing STEP 8 *****"
starttime
if 1 == `gate8' ///
{
	disp "***** STEP 6: plot the distribution of earnings by occupation *****"
	local datadir T:\${data_folder}\
	
	use "`datadir'gc_individual_panel_`gender'_`ext'.dta", clear
	
	sort pid tax_yr
	
	if "`gender'" == "men" ///
	{
		keep if sex == 1
	}

	if "`gender'" == "women" ///
	{
		keep if sex == 2
	}
	
	keep if age>= 25 & age <= 55
	
	drop if inimdb == 1
	
	keep if tax_yr == 2012
	
	************************************************************************
	* Construct various occupational categories based on sources of income *
	************************************************************************
	* nonemployed *
	gen occ_v1 = 0
	* incorporated *
	replace occ_v1 = 3 if y_3_all >= y_0
	* unincorporated *
	replace occ_v1 = 2 if occ_v1 == 0 & y_2 >= y_0
	* paid employee *
	replace occ_v1 = 1 if occ_v1 == 0 & y_1 >= y_0
	label var occ_v1 "Occupation, v1"
	* construct relevant measure of income *
	gen y_v1 = .
	replace y_v1 = y_1 if occ_v1 == 1
	replace y_v1 = y_2 if occ_v1 == 2
	replace y_v1 = y_3_all if occ_v1 == 3
	label var y_v1 "Annual income (main occupation), v1"
	
	**************************
	* merge firm information *
	**************************
	merge m:1 eid_long using "`datadir'gc_firm_info_`ext'.dta"
	drop if _merge == 2
	drop _merge
	ds
	
	*********************************************
	* drop worker with missing firm information *
	*********************************************
	display "...Drop worker with missing firm information..."
	drop if occ_v1 == 1 & f2_rev_perL_3c == .

	* drop unemployed *
	drop if occ_v1 == 0
	
	gen ln_y = ln(y_v1)
	
	foreach o in "1" "2" "3" ///
	{
		preserve
		keep if occ_v1 == `o'
		gen bin0 = 0
		replace bin0 = 1 if ln_y <= 9.5
		gen counter0 = 0
		replace counter0 = 1 if ln_y <= 9.5
		forvalues i = 1(1)9 ///
		{
			local j = `i'-1
			gen bin`i' = 0
			replace bin`i' = 1 if ln_y > 9.5 + `j'*.5  & ln_y <= 9.5 + `i'*.5
			gen counter`i' = 0
			replace counter`i' = 1 if ln_y > 9.5 + `j'*.5  & ln_y <= 9.5 + `i'*.5
		}
		gen bin10 = 0
		replace bin10 = 1 if ln_y > 14
		gen counter10 = 0
		replace counter10 = 1 if ln_y > 14
		local datadir T:\${data_folder}\gc_esamples\
		saveold "`datadir'gc_ss_lny_`o'_dist.dta", replace v(12)
		local datadir T:\${data_folder}\
		sum bin*
		collapse (mean) bin* (sum) counter*
		gen occ_v1 = `o'
		local datadir T:\${data_folder}\
		save "`datadir'gc_kdensity_tmp`o'_`ext'.dta", replace
		restore
	}

	****************************
	* MAKE GRAPH BY OCCUPATION *
	****************************
	local datadir T:\${data_folder}\
	use "`datadir'gc_kdensity_tmp1_`ext'.dta", clear
	append using "`datadir'gc_kdensity_tmp2_`ext'.dta"
	append using "`datadir'gc_kdensity_tmp3_`ext'.dta"
	reshape long bin counter, i(occ_v1) j(cats)
	rename bin frac
	* make numbers to get bars in the right place *
	gen barnum = .
	* by bin ln_y *
	replace barnum = 1 if cats == 0 & occ_v1 == 2
	replace barnum = 2 if cats == 0 & occ_v1 == 1
	replace barnum = 3 if cats == 0 & occ_v1 == 3
	
	replace barnum = 5 if cats == 1 & occ_v1 == 2
	replace barnum = 6 if cats == 1 & occ_v1 == 1
	replace barnum = 7 if cats == 1 & occ_v1 == 3
	
	replace barnum = 9 if cats == 2 & occ_v1 == 2
	replace barnum = 10 if cats == 2 & occ_v1 == 1
	replace barnum = 11 if cats == 2 & occ_v1 == 3
	
	replace barnum = 13 if cats == 3 & occ_v1 == 2
	replace barnum = 14 if cats == 3 & occ_v1 == 1
	replace barnum = 15 if cats == 3 & occ_v1 == 3
	
	replace barnum = 17 if cats == 4 & occ_v1 == 2
	replace barnum = 18 if cats == 4 & occ_v1 == 1
	replace barnum = 19 if cats == 4 & occ_v1 == 3
	
	replace barnum = 21 if cats == 5 & occ_v1 == 2
	replace barnum = 22 if cats == 5 & occ_v1 == 1
	replace barnum = 23 if cats == 5 & occ_v1 == 3
	
	replace barnum = 25 if cats == 6 & occ_v1 == 2
	replace barnum = 26 if cats == 6 & occ_v1 == 1
	replace barnum = 27 if cats == 6 & occ_v1 == 3
	
	replace barnum = 29 if cats == 7 & occ_v1 == 2
	replace barnum = 30 if cats == 7 & occ_v1 == 1
	replace barnum = 31 if cats == 7 & occ_v1 == 3
	
	replace barnum = 33 if cats == 8 & occ_v1 == 2
	replace barnum = 34 if cats == 8 & occ_v1 == 1
	replace barnum = 35 if cats == 8 & occ_v1 == 3
	
	replace barnum = 37 if cats == 9 & occ_v1 == 2
	replace barnum = 38 if cats == 9 & occ_v1 == 1
	replace barnum = 39 if cats == 9 & occ_v1 == 3
	
	replace barnum = 41 if cats == 10 & occ_v1 == 2
	replace barnum = 42 if cats == 10 & occ_v1 == 1
	replace barnum = 43 if cats == 10 & occ_v1 == 3
	
	sort barnum
	local datadir T:\${data_folder}\gc_esamples\
	saveold "`datadir'gc_ss_lny_dist.dta", replace v(12)
	local datadir T:\${data_folder}\
	* set up graph options *
	local datadir T:\${output_folder}\
	disp "`datadir'"
	twoway (bar frac barnum if occ_v1 == 2, color(gs12)) (bar frac barnum if occ_v1 == 1, color(gs7)) (bar frac barnum if occ_v1 == 3, color(gs2)), ///
	legend(order(1 "Unincorporated" 2 "Worker" 3 "Incorporated") c(3) position(6) region(lstyle(none))) ///
	xlabel(2 "< 9.5" 10 "10-10.5" 18 "11-11.5" 26 "12-12.5" 34 "13-13.5" 42 ">14") ///
	xsize(11) ysize(8.5) xtitle("") ytitle("") bgcolor(white) graphregion(color(white)) plotregion(ls(none)) scheme(s1mono)
	graph save "`datadir'gc_${date}_ss_lny_dist_legend_`gender'_`ext'.gph", replace
	
	twoway (bar frac barnum if occ_v1 == 2, color(gs12)) (bar frac barnum if occ_v1 == 1, color(gs7)) (bar frac barnum if occ_v1 == 3, color(gs2)), ///
	legend(off) ///
	xlabel(2 "< 9.5" 10 "10-10.5" 18 "11-11.5" 26 "12-12.5" 34 "13-13.5" 42 ">14") ///
	xsize(11) ysize(8.5) xtitle("") ytitle("") bgcolor(white) graphregion(color(white)) plotregion(ls(none)) scheme(s1mono)
	graph save "`datadir'gc_${date}_ss_lny_dist_`gender'_`ext'.gph", replace
	
	local datadir T:\${data_folder}\
	capture erase "`datadir'gc_kdensity_tmp1_`ext'.dta"
	capture erase "`datadir'gc_kdensity_tmp2_`ext'.dta"
	capture erase "`datadir'gc_kdensity_tmp3_`ext'.dta"
	clear
	
	disp "***** STEP 8: plot the distribution of earnings by occupation (COMPLETED) *****"
}
disp "***** Finished processing STEP 8 *****"
endtime

***********************************************************
* STEP 9: plot the share of entrepreneurs over time, cohort by cohort *
***********************************************************
disp "***** Started processing STEP 8 *****"
starttime
if 1 == `gate9' ///
{
	disp "***** STEP 6: plot the distribution of earnings by occupation *****"
	local datadir T:\${data_folder}\
	
	use "`datadir'gc_individual_panel_`gender'_`ext'.dta", clear
	
	sort pid tax_yr
	
	if "`gender'" == "men" ///
	{
		keep if sex == 1
	}

	if "`gender'" == "women" ///
	{
		keep if sex == 2
	}
	
	keep if age>= 25 & age <= 55
	
	drop if inimdb == 1
	
	*keep if tax_yr == 2012
	
	************************************************************************
	* Construct various occupational categories based on sources of income *
	************************************************************************
	* nonemployed *
	gen occ_v1 = 0
	* incorporated *
	replace occ_v1 = 3 if y_3_all >= y_0
	* unincorporated *
	replace occ_v1 = 2 if occ_v1 == 0 & y_2 >= y_0
	* paid employee *
	replace occ_v1 = 1 if occ_v1 == 0 & y_1 >= y_0
	label var occ_v1 "Occupation, v1"
	
	**************************
	* merge firm information *
	**************************
	merge m:1 eid_long using "`datadir'gc_firm_info_`ext'.dta"
	drop if _merge == 2
	drop _merge
	ds
	
	*********************************************
	* drop worker with missing firm information *
	*********************************************
	display "...Drop worker with missing firm information..."
	drop if occ_v1 == 1 & f2_rev_perL_3c == .
	
	sort tax_yr dob_yr
	gen occ3=0
	replace occ3 = 1 if occ_v1 == 3
	gen occ_all = 1
	bys tax_yr dob_yr: egen share_occ3 = mean(occ3)
	keep tax_yr dob_yr occ3 occ_all share_occ3 pid
	sort dob_yr tax_yr
	local datadir T:\${data_folder}\gc_esamples\
	saveold "`datadir'gc_ss_lifecycle_cohort.dta", replace v(12)
	
	local datadir T:\${data_folder}\gc_esamples\
	use "`datadir'gc_ss_lifecycle_cohort.dta", clear
	bys dob_yr tax_yr: sum occ3
	keep tax_yr dob_yr share_occ3
	duplicates drop
	sort dob_yr tax_yr
	gen age = tax_yr - dob_yr
	
	* old cohorts *
	local mc1 = "gs11"
	local m1 = "diamond"
	local msize1 = "medsmall"
	local mfc1 = "white"
	local mlc1 = "gs11"
	local lcolor1 = "gs11"
	local lpattern1 = "solid"
	local lwidth1 = "vthin"
	
	* young cohorts *
	local mc2 = "black"
	local m2 = "diamond"
	local msize2 = "medsmall"
	local mfc2 = "white"
	local mlc2 = "black"
	local lcolor2 = "black"
	local lpattern2 = "solid"
	local lwidth2 = "vthin"
	
	* 25 *
	local mc25 = "none"
	local m25 = "diamond"
	local msize25 = "medsmall"
	local mfc25 = "red"
	local mlc25 = "none"
	local lcolor25 = "red"
	local lpattern25 = "dash"
	local lwidth25 = "vthin"
	
	* 30 *
	local mc30 = "none"
	local m30 = "diamond"
	local msize30 = "medsmall"
	local mfc30 = "blue"
	local mlc30 = "none"
	local lcolor30 = "blue"
	local lpattern30 = "dash"
	local lwidth30 = "vthin"
	
	* 35 *
	local mc35 = "none"
	local m35 = "diamond"
	local msize35 = "medsmall"
	local mfc35 = "orange"
	local mlc35 = "none"
	local lcolor35 = "orange"
	local lpattern35 = "dash"
	local lwidth35 = "vthin"
	
	* 40 *
	local mc40 = "none"
	local m40 = "diamond"
	local msize40 = "medsmall"
	local mfc40 = "green"
	local mlc40 = "none"
	local lcolor40 = "green"
	local lpattern40 = "dash"
	local lwidth40 = "vthin"
	
	* 45 *
	local mc45 = "none"
	local m45 = "diamond"
	local msize45 = "medsmall"
	local mfc45 = "purple"
	local mlc45 = "none"
	local lcolor45 = "purple"
	local lpattern45 = "dash"
	local lwidth45 = "vthin"
	
	* 50 *
	local mc50 = "none"
	local m50 = "diamond"
	local msize50 = "medsmall"
	local mfc50 = "pink"
	local mlc50 = "none"
	local lcolor50 = "pink"
	local lpattern50 = "dash"
	local lwidth50 = "vthin"
	
	* 55 *
	local mc55 = "none"
	local m55 = "diamond"
	local msize55 = "medsmall"
	local mfc55 = "yellow"
	local mlc55 = "none"
	local lcolor55 = "yellow"
	local lpattern55 = "dash"
	local lwidth55 = "vthin"
	
	* set up graph options *
	local datadir T:\${output_folder}\
	disp "`datadir'"
	
	twoway ///
	(connected share_occ3 tax_yr if dob_yr == 1946, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1947, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1948, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1949, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1950, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1951, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1952, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1953, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1954, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1955, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1956, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1957, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1958, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1959, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1960, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1961, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1962, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1963, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1964, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1965, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1966, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1967, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1968, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1969, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1970, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1971, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1972, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1973, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1974, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1975, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1976, c(l) mc(`mc2') m(`m2') msize(`msize2') mfc(`mfc2') mlc(`mlc2') lcolor(`lcolor2') lpattern(`lpattern2') lwidth(`lwidth2')) ///
	(connected share_occ3 tax_yr if dob_yr == 1977, c(l) mc(`mc2') m(`m2') msize(`msize2') mfc(`mfc2') mlc(`mlc2') lcolor(`lcolor2') lpattern(`lpattern2') lwidth(`lwidth2')) ///
	(connected share_occ3 tax_yr if dob_yr == 1978, c(l) mc(`mc2') m(`m2') msize(`msize2') mfc(`mfc2') mlc(`mlc2') lcolor(`lcolor2') lpattern(`lpattern2') lwidth(`lwidth2')) ///
	(connected share_occ3 tax_yr if dob_yr == 1979, c(l) mc(`mc2') m(`m2') msize(`msize2') mfc(`mfc2') mlc(`mlc2') lcolor(`lcolor2') lpattern(`lpattern2') lwidth(`lwidth2')) ///
	(connected share_occ3 tax_yr if dob_yr == 1980, c(l) mc(`mc2') m(`m2') msize(`msize2') mfc(`mfc2') mlc(`mlc2') lcolor(`lcolor2') lpattern(`lpattern2') lwidth(`lwidth2')) ///
	(connected share_occ3 tax_yr if dob_yr == 1981, c(l) mc(`mc2') m(`m2') msize(`msize2') mfc(`mfc2') mlc(`mlc2') lcolor(`lcolor2') lpattern(`lpattern2') lwidth(`lwidth2')) ///
	(connected share_occ3 tax_yr if dob_yr == 1982, c(l) mc(`mc2') m(`m2') msize(`msize2') mfc(`mfc2') mlc(`mlc2') lcolor(`lcolor2') lpattern(`lpattern2') lwidth(`lwidth2')) ///
	(connected share_occ3 tax_yr if dob_yr == 1983, c(l) mc(`mc2') m(`m2') msize(`msize2') mfc(`mfc2') mlc(`mlc2') lcolor(`lcolor2') lpattern(`lpattern2') lwidth(`lwidth2')) ///
	(connected share_occ3 tax_yr if dob_yr == 1984, c(l) mc(`mc2') m(`m2') msize(`msize2') mfc(`mfc2') mlc(`mlc2') lcolor(`lcolor2') lpattern(`lpattern2') lwidth(`lwidth2')) ///
	(connected share_occ3 tax_yr if dob_yr == 1985, c(l) mc(`mc2') m(`m2') msize(`msize2') mfc(`mfc2') mlc(`mlc2') lcolor(`lcolor2') lpattern(`lpattern2') lwidth(`lwidth2')) ///
	(connected share_occ3 tax_yr if dob_yr == 1986, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1987, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if age == 25, c(l) mc(`mc25') m(`m25') msize(`msize25') mfc(`mfc25') mlc(`mlc25') lcolor(`lcolor25') lpattern(`lpattern25') lwidth(`lwidth25')) ///
	(connected share_occ3 tax_yr if age == 30, c(l) mc(`mc30') m(`m30') msize(`msize30') mfc(`mfc30') mlc(`mlc30') lcolor(`lcolor30') lpattern(`lpattern30') lwidth(`lwidth30')) ///
	(connected share_occ3 tax_yr if age == 35, c(l) mc(`mc35') m(`m35') msize(`msize35') mfc(`mfc35') mlc(`mlc35') lcolor(`lcolor35') lpattern(`lpattern35') lwidth(`lwidth35')) ///
	(connected share_occ3 tax_yr if age == 40, c(l) mc(`mc40') m(`m40') msize(`msize40') mfc(`mfc40') mlc(`mlc40') lcolor(`lcolor40') lpattern(`lpattern40') lwidth(`lwidth40')) ///
	(connected share_occ3 tax_yr if age == 45, c(l) mc(`mc45') m(`m45') msize(`msize45') mfc(`mfc45') mlc(`mlc45') lcolor(`lcolor45') lpattern(`lpattern45') lwidth(`lwidth45')) ///
	(connected share_occ3 tax_yr if age == 50, c(l) mc(`mc50') m(`m50') msize(`msize50') mfc(`mfc50') mlc(`mlc50') lcolor(`lcolor50') lpattern(`lpattern50') lwidth(`lwidth50')) ///
	(connected share_occ3 tax_yr if age == 55, c(l) mc(`mc55') m(`m55') msize(`msize55') mfc(`mfc55') mlc(`mlc55') lcolor(`lcolor55') lpattern(`lpattern55') lwidth(`lwidth55')) ///
	, legend(off) ///
	xsize(11) ysize(8.5) xlabel(2001(1)2012) ylabel(0(.02).08) xtitle("") ytitle("") bgcolor(white) graphregion(color(white)) plotregion(ls(none)) scheme(s1mono)
	graph save "`datadir'gc_${date}_stylized_lifecycle_cohort_`ext'.gph", replace
	
	twoway ///
	(connected share_occ3 tax_yr if dob_yr == 1946, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1947, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1948, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1949, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1950, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1951, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1952, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1953, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1954, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1955, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1956, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1957, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1958, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1959, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1960, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1961, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1962, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1963, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1964, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1965, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1966, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1967, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1968, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1969, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1970, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1971, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1972, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1973, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1974, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1975, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1976, c(l) mc(`mc2') m(`m2') msize(`msize2') mfc(`mfc2') mlc(`mlc2') lcolor(`lcolor2') lpattern(`lpattern2') lwidth(`lwidth2')) ///
	(connected share_occ3 tax_yr if dob_yr == 1977, c(l) mc(`mc2') m(`m2') msize(`msize2') mfc(`mfc2') mlc(`mlc2') lcolor(`lcolor2') lpattern(`lpattern2') lwidth(`lwidth2')) ///
	(connected share_occ3 tax_yr if dob_yr == 1978, c(l) mc(`mc2') m(`m2') msize(`msize2') mfc(`mfc2') mlc(`mlc2') lcolor(`lcolor2') lpattern(`lpattern2') lwidth(`lwidth2')) ///
	(connected share_occ3 tax_yr if dob_yr == 1979, c(l) mc(`mc2') m(`m2') msize(`msize2') mfc(`mfc2') mlc(`mlc2') lcolor(`lcolor2') lpattern(`lpattern2') lwidth(`lwidth2')) ///
	(connected share_occ3 tax_yr if dob_yr == 1980, c(l) mc(`mc2') m(`m2') msize(`msize2') mfc(`mfc2') mlc(`mlc2') lcolor(`lcolor2') lpattern(`lpattern2') lwidth(`lwidth2')) ///
	(connected share_occ3 tax_yr if dob_yr == 1981, c(l) mc(`mc2') m(`m2') msize(`msize2') mfc(`mfc2') mlc(`mlc2') lcolor(`lcolor2') lpattern(`lpattern2') lwidth(`lwidth2')) ///
	(connected share_occ3 tax_yr if dob_yr == 1982, c(l) mc(`mc2') m(`m2') msize(`msize2') mfc(`mfc2') mlc(`mlc2') lcolor(`lcolor2') lpattern(`lpattern2') lwidth(`lwidth2')) ///
	(connected share_occ3 tax_yr if dob_yr == 1983, c(l) mc(`mc2') m(`m2') msize(`msize2') mfc(`mfc2') mlc(`mlc2') lcolor(`lcolor2') lpattern(`lpattern2') lwidth(`lwidth2')) ///
	(connected share_occ3 tax_yr if dob_yr == 1984, c(l) mc(`mc2') m(`m2') msize(`msize2') mfc(`mfc2') mlc(`mlc2') lcolor(`lcolor2') lpattern(`lpattern2') lwidth(`lwidth2')) ///
	(connected share_occ3 tax_yr if dob_yr == 1985, c(l) mc(`mc2') m(`m2') msize(`msize2') mfc(`mfc2') mlc(`mlc2') lcolor(`lcolor2') lpattern(`lpattern2') lwidth(`lwidth2')) ///
	(connected share_occ3 tax_yr if dob_yr == 1986, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if dob_yr == 1987, c(l) mc(`mc1') m(`m1') msize(`msize1') mfc(`mfc1') mlc(`mlc1') lcolor(`lcolor1') lpattern(`lpattern1') lwidth(`lwidth1')) ///
	(connected share_occ3 tax_yr if age == 25, c(l) mc(`mc25') m(`m25') msize(`msize25') mfc(`mfc25') mlc(`mlc25') lcolor(`lcolor25') lpattern(`lpattern25') lwidth(`lwidth25')) ///
	(connected share_occ3 tax_yr if age == 30, c(l) mc(`mc30') m(`m30') msize(`msize30') mfc(`mfc30') mlc(`mlc30') lcolor(`lcolor30') lpattern(`lpattern30') lwidth(`lwidth30')) ///
	(connected share_occ3 tax_yr if age == 35, c(l) mc(`mc35') m(`m35') msize(`msize35') mfc(`mfc35') mlc(`mlc35') lcolor(`lcolor35') lpattern(`lpattern35') lwidth(`lwidth35')) ///
	(connected share_occ3 tax_yr if age == 40, c(l) mc(`mc40') m(`m40') msize(`msize40') mfc(`mfc40') mlc(`mlc40') lcolor(`lcolor40') lpattern(`lpattern40') lwidth(`lwidth40')) ///
	(connected share_occ3 tax_yr if age == 45, c(l) mc(`mc45') m(`m45') msize(`msize45') mfc(`mfc45') mlc(`mlc45') lcolor(`lcolor45') lpattern(`lpattern45') lwidth(`lwidth45')) ///
	(connected share_occ3 tax_yr if age == 50, c(l) mc(`mc50') m(`m50') msize(`msize50') mfc(`mfc50') mlc(`mlc50') lcolor(`lcolor50') lpattern(`lpattern50') lwidth(`lwidth50')) ///
	(connected share_occ3 tax_yr if age == 55, c(l) mc(`mc55') m(`m55') msize(`msize55') mfc(`mfc55') mlc(`mlc55') lcolor(`lcolor55') lpattern(`lpattern55') lwidth(`lwidth55')) ///
	, legend(order(1 "1946-1975 and 1986-1987" 31 "1976-1985" 43 "Age 25" 44 "Age 30" 45 "Age 35" 46 "Age 40" 47 "Age 45" 48 "Age 50" 49 "Age 55") c(3) position(6) region(lstyle(none))) ///
	xsize(11) ysize(8.5) xlabel(2001(1)2012) ylabel(0(.02).08) xtitle("") ytitle("") bgcolor(white) graphregion(color(white)) plotregion(ls(none)) scheme(s1mono)
	graph save "`datadir'gc_${date}_stylized_lifecycle_cohort_legend_`ext'.gph", replace
	
	clear
	
	disp "***** STEP 9: plot the distribution of earnings by occupation (COMPLETED) *****"
}
disp "***** Finished processing STEP 9 *****"
endtime


********************************************
* STEP X: delete all intermediate datasets *
********************************************
disp "***** Started processing STEP X *****"
starttime
if 1 == `gateX' ///
{
	disp "***** STEP X: delete all intermediate datasets *****"
	local datadir T:\${data_folder}\
	
	foreach a of global start_age ///
	{
		foreach v of global occ_version ///
		{	
			foreach d of global class_def ///
			{
				capture erase "`datadir'gc_analyze_me_`gender'_start`a'_occ_v`v'_def`d'_tmp.dta"
			}
		}
	}
	disp "***** STEP X: delete all intermediate datasets (COMPLETED) *****"
}
disp "***** Finished processing STEP X *****"
endtime

********
* EXIT *
********
clear all
log close
